clear all
set more off
set mem 10000000
set matsize 10000
version 15

****************************************************************************
*** FUZZY RDROBUST, SECC outcomes: splits on hours of rural power supply ***
****************************************************************************

** Set file paths
do "$path_code/paths.do"

** Set graph scheme
cd "$path/code/analyze"
set scheme fb, perm

****************************************************************** 
****************************************************************** 

{
use "$panel/panel_dataset_full.dta", clear

	// Summarize hours of power to electrified villages, by district
egen temp1 = mean(vdp_pwr_h_all_avg) if vdp_pwr_h_all_avg_11>0, by(st_code dt_code)
egen HRS_all_wide = mode(temp1), by(st_code dt_code)
egen temp2 = mean(vdp_pwr_h_dom_avg) if vdp_pwr_h_dom_avg_11>0, by(st_code dt_code)
egen HRS_dom_wide = mode(temp2), by(st_code dt_code)
egen temp3 = mean(vdp_pwr_h_all_avg) if vdp_pwr_h_all_avg_11>0 & inrange(tot_p,150,450), by(st_code dt_code)
egen HRS_all_rd = mode(temp3), by(st_code dt_code)
egen temp4 = mean(vdp_pwr_h_dom_avg) if vdp_pwr_h_dom_avg_11>0  & inrange(tot_p,150,450), by(st_code dt_code)
egen HRS_dom_rd = mode(temp4), by(st_code dt_code)

	// Keep villages in RD sample
gen in_rf_sample = vplan4<11 & sample==1 & sample_h==1 & sing_h==1 & pop_non_zero==1 
gen in_fs_sample = vplan4<11 & corr_state==1 & sample==1 & sample_h==1 & sing_h==1 & pop_non_zero==1 
keep if in_rf_sample==1 | in_fs_sample==1
keep if HRS_all_wide>=10 & HRS_all_wide!=.	

	// Merge in SECC outcomes at the village level
merge 1:m pca01_id using "$panel/secc_pca_vill_all.dta", keep(3) nogen
gen p_ratio = n_tot/tot_p11
gen hh_ratio = secc_n_hh/no_hh11
keep if p_ratio<=1.10
keep if hh_ratio<=1.10
	
	// Create state FEs (since RD robust doesn't let you pass them through)
drop if st_code==32 // Kerala, only 3 villages
tab st_code if in_fs_sample==1, gen(STFEfs)	
drop STFEfs1 // to avoid collinearity (maybe also drop STFEfs2, since STFEfs1 is Haryana?)

tab st_code if in_fs_sample==1, gen(STFEhfs)	
drop STFEhfs1 //STFEhfs12 // need to additionally drop Karnataka, which barely reports hours of power in cases wher SECC also matches


	// Create district FEs (since RD robust doesn't let you pass them through)
gen stdtFE = stdt
tab stdtFE state     if inlist(stdtFE,15,62,63,64,70,91,101,116,117,138,158,175,180,181,202,209,213,288,308,309,320,433,490,493,494,495,504,505,508)	
replace stdtFE = 999 if inlist(stdtFE,15,62,63,64,70,91,101,116,117,138,158,175,180,181,202,209,213,288,308,309,320,433,490,493,494,495,504,505,508)	
	// one catch-all district FE for districts with so few in-sample villages that they break rdrobust
tab stdtFE if in_fs_sample==1, gen(DTFEfs)	
drop DTFEfs1 // to avoid collinearity

replace stdtFE = 999 if st_code==29	
tab stdtFE if in_fs_sample==1, gen(DTFEhfs)	
drop DTFEhfs1 // to avoid collinearity

	// Keep only fs_sample
keep if in_fs_sample==1	

	// Create block groups (for clustering)
egen stdtbk = group(stdt bk_code)

	// Create lights-difference variable, to identify crazy outliers
gen lights_diff = abs(lights_max2011_hat - lights_max2001_hat)

	// Create apples-to-apples "other" category
foreach g in m f {
	foreach a in _ _16_ {
		gen n_`g'`a'occ_ot = n_`g'`a'occ_oth + n_`g'`a'occ_wrk
	}
}	

	// Definte SECC labor shares for whole population
foreach g in m f {
	gen denom_`g' = n_`g'_age_0_6 + n_`g'_age_7_15 + n_`g'_age_16_25 + n_`g'_age_26_40 + n_`g'_age_41_60 + n_`g'_age_61_plus
	gen work_secc_ag_`g' = n_`g'_occ_agr/denom_`g'
	gen work_secc_hh_`g' = n_`g'_occ_dom/denom_`g'
	gen work_secc_ot_`g' = n_`g'_occ_ot/denom_`g'
	gen work_secc_wk_`g' = n_`g'_occ_wrk/denom_`g'
	gen work_secc_oth_`g' = n_`g'_occ_oth/denom_`g'
}	
	
  // Define SECC-specific reduced-form outcome variables
foreach g in m f {
	foreach v of varlist n_`g'_16_occ* {
		local v1 = subinstr("`v'","_16_","_",1)
		local v2 = subinstr("`v'","_16_","_youth_",1)
		gen `v2' =  `v1' - `v'
		assert `v'!=. & `v2'!=.
		replace `v' = `v'/(n_`g'_age_16_25 + n_`g'_age_26_40 + n_`g'_age_41_60 + n_`g'_age_61_plus)
		replace `v2' = `v2'/(n_`g'_age_0_6 + n_`g'_age_7_15) 
		replace `v' = 0 if `v'==.
		replace `v2' = 0 if `v2'==.
		assert `v'<=1 & `v2'<=1 & `v'>=0 & `v2'>=0 
	}
	foreach v of varlist n_`g'_16_single n_`g'_16_illit n_`g'_16_midsch {
		replace `v' = `v'/(n_`g'_age_16_25 + n_`g'_age_26_40 + n_`g'_age_41_60 + n_`g'_age_61_plus)
		replace `v' = 0 if `v'==.
		assert `v'<=1 & `v'>=0 
	}	
	
	gen n_`g'_16_emp = n_`g'_16_occ_agr + n_`g'_16_occ_dom + n_`g'_16_occ_oth + n_`g'_16_occ_wrk
	gen n_`g'_youth_emp = n_`g'_youth_occ_agr + n_`g'_youth_occ_dom + n_`g'_youth_occ_oth + n_`g'_youth_occ_wrk	
}	

	// Main source of HH income: combine foraging and "other" into "other" category
replace pct_hh_mnth_inc_oth = pct_hh_mnth_inc_oth + pct_hh_mnth_inc_for
replace wpct_hh_mnth_inc_oth = wpct_hh_mnth_inc_oth + wpct_hh_mnth_inc_for
	
	// Convert illiterary to literacy
foreach v of varlist *illit {
	local v2 = subinstr("`v'","illit","lit",1)
	gen `v2' = 1 - `v'
	drop `v'
}	

	// Household head apples-to-apples other category
gen pct_hhh_occ_ot = pct_hhh_occ_oth + pct_hhh_occ_wrk

	// Drop outcomes that aren't useful and will just make the regression loop longer
drop *pct_hh_mnth_inc_10 *pct_hh_mnth_inc_for

	// Create macros for lists of outcome variables 
global yvars = "hh_ratio pct_hh_mnth_inc_5_10 pct_hh_salaried_job pct_hh_own_any_land n_m_16_occ_agr n_f_16_occ_agr n_m_16_occ_dom n_f_16_occ_dom n_m_16_occ_ot n_f_16_occ_ot "

	// Merge in SHRUG outcomes to create index variable
merge 1:1 pca01_id using "$shrug/shrug_secc.dta", keep(1 3) nogen
gen hh_ratio_conv = max(1-hh_ratio,0)
gen secc11_inc_cultiv_share_conv = max(1-secc11_inc_cultiv_share,0)
foreach v of varlist hh_ratio_conv pct_hh_mnth_inc_5_10 pct_hh_salaried_job pct_hh_own_any_land secc11_inc_cultiv_share_conv {
	qui sum `v' if tot_p<=1000
	gen Z`v' = (`v' - r(mean))/(r(sd))
}
egen double index_secc11 = rmean(Z*)
drop Z*


	// Create variables to store regresson results
gen fuzzy_step = .
gen fuzzyvar = ""
gen yvar = ""
gen ifs = ""
gen controls_base = ""
gen control = ""
gen fe = ""
gen kernel = ""
gen bwmethod = ""
gen vce = ""
gen polynomial_order = .
gen beta_conv = .
gen beta_robust = .
gen se_conv = .
gen se_robust = .
gen pval_conv = .
gen pval_robust = .
gen lci_conv = .
gen uci_conv = .
gen lci_robust = .
gen uci_robust = .
gen bw_lo = .
gen bw_hi = .
gen nobs_orig = .
gen nobs_left = .
gen nobs_right = .
gen nobs_total = .
gen ndist = . 
gen ymean = .
gen ftag = ""
gen hrstag = ""


** Massive loop over RDROBUST outcomes and sensitivities

// Loop over endogenous variables 
foreach fuzzy of varlist vdp_pwr_h_com_avg_11 lights_max2011_hat hpca11_msl_elec {
	
	if "`fuzzy'"=="vdp_pwr_h_com_avg_11" {
		local h = "h"
	}
	else {
		local h = "" 
	}
	
	// Loop over outcome variables
	foreach y in $yvars index_secc11 {

		// Prep to store results
		foreach v of varlist fuzzy_step-ftag {
			cap replace `v' = ""
			cap replace `v' = .
		}
		local row = 0

		// Define outcome-specific stuff
		global yvar = "`y'"
		global control = subinstr("$yvar","11","01",1)
		if substr("$yvar",-4,4)=="1101" {
			global control = subinstr("$yvar","1101","01",1)
		}
		qui do "$path/code/analyze/RDROBUST_rf_outcomes_graphspecs.do"

		// Loop through sensitivities
		foreach fuzzy_step in 1 4 5 11 {

			// Reset RDROBUST defaults
			local ifs = "in_fs_sample==1 & pop_mismatch20==0 & lights_diff<20 & HRS_all_wide>=10 & HRS_all_wide!=."
			local controls_base = "lights_max1998 lights_max1999 lights_max2000 lights_max2001 lights_max2002 lights_max2003 lights_max2004 lights_max2005" 
			local fe = "STFE`h'fs*"
			local kernel = "tri"
			local bwmethod = "mserd"
			local vce = ""
			local poly = 1

			// Define step-specific RDROBUST settings
			if `fuzzy_step'==1 {
				local ftag = "preferred"
			}
			if `fuzzy_step'==2 {
				local ftag = "popmismatch"
				local ifs = "in_fs_sample==1 & lights_diff<20"
			}
			if `fuzzy_step'==3 {
				local ftag = "lights outliers"
				local ifs = "in_fs_sample==1 & pop_mismatch20==0"
			}
			if `fuzzy_step'==4 {
				local ftag = "epa kernel"
				local kernel = "epa"
			}
			if `fuzzy_step'==5 {
				local ftag = "uni kernel"
				local kernel = "uni"
			}
			if `fuzzy_step'==6 {
				local ftag = "no lights controls"
				local controls_base = ""
			}
			if `fuzzy_step'==7 {
				local ftag = "no FEs"
				local fe = ""
			}
			if `fuzzy_step'==8 {
				local ftag = "district FEs"
				local fe = "DTFE`h'fs*"
			}
			if `fuzzy_step'==9 {
				local ftag = "nncluster by district"
				local vce = "vce(nncluster stdt)"
			}
			if `fuzzy_step'==10 {
				local ftag = "cluster by district"
				local vce = "vce(cluster stdt)"
			}
			if `fuzzy_step'==11 {
				local ftag = "CERRD bandwidth"
				local bwmethod = "cerrd"
			}

			local hrstag = "over10hrs"

			// Use capture since some might break RDROBUST
			cap {
			
				// Run fuzzy RD regresssion
				rdrobust $yvar tot_p if `ifs', c(299.5) fuzzy(`fuzzy') covs(`controls_base' $control `fe') kernel(`kernel') bwselect(`bwmethod') p(`poly') all `vce'

				// Generate in-sample indicator and store bandwidths
				cap drop temp_in_reg
				qui gen temp_in_reg = `ifs' & inrange(tot_p,299.5-e(h_l),299.5+e(h_r)) & ${yvar}!=.

				// Store results
				local row = `row' + 1
				qui replace fuzzy_step = `fuzzy_step' in `row'
				qui replace fuzzyvar = "`fuzzy'" in `row'
				qui replace yvar = "$yvar" in `row'
				qui replace ifs = "`ifs'" in `row'
				qui replace controls_base = "`controls_base'" in `row'
				qui replace control = "$control" in `row'
				qui replace fe = "`fe'" in `row'
				qui replace kernel = e(kernel) in `row'
				qui replace bwmethod = e(bwselect) in `row'
				qui replace vce = "`vce'" in `row'
				qui replace polynomial_order = e(p) in `row'
				qui replace beta_conv = e(tau_cl) in `row'
				qui replace beta_robust = e(tau_bc) in `row'
				qui replace se_conv = e(se_tau_cl) in `row'
				qui replace se_robust = e(se_tau_rb) in `row'
				qui replace pval_conv = e(pv_cl) in `row'
				qui replace pval_robust = e(pv_rb) in `row'
				qui replace lci_conv = e(ci_l_cl) in `row'
				qui replace uci_conv = e(ci_r_cl) in `row'
				qui replace lci_robust = e(ci_l_rb) in `row'
				qui replace uci_robust = e(ci_r_rb) in `row'
				qui replace bw_lo = e(h_l) in `row'
				qui replace bw_hi = e(h_r) in `row'
				qui replace nobs_orig = e(N) in `row'
				qui replace nobs_left = e(N_h_l) in `row'
				qui replace nobs_right = e(N_h_r) in `row'
				qui replace nobs_total = e(N_h_l) + e(N_h_r) in `row'
				qui unique stdt if temp_in_reg==1
				qui replace ndist = r(unique) in `row'
				qui sum $yvar if temp_in_reg==1 & tot_p<299.5
				qui replace ymean = r(mean) in `row'
				qui replace ftag = "`ftag'" in `row'
				qui replace hrstag = "`hrstag'" in `row'

			}	

		// Intermediate output
		di "`fuzzy'    $yvar    `fuzzy_step'   "    c(current_time)

		}

		
		// Save results
		preserve
		keep fuzzy_step-hrstag
		dropmiss, obs force
		if !("$yvar"=="hh_ratio" & "`fuzzy'"=="vdp_pwr_h_com_avg_11") {
			tempfile reg_results
			save `reg_results'
			clear
			append using "$results/RDROBUST_outcomes_fuzzy_secc_hrs.dta" `reg_results'
		}
		duplicates drop
		compress
		save "$results/RDROBUST_outcomes_fuzzy_secc_hrs.dta", replace
		restore
		
	}

}

}

****************************************************************** 
****************************************************************** 

